# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Read colmap model."""

# pylint: disable=invalid-name
# pylint: disable=g-explicit-length-test

import collections
import os
import struct
import sys
import numpy as np


CameraModel = collections.namedtuple(
    "CameraModel", ["model_id", "model_name", "num_params"]
)
Camera = collections.namedtuple(
    "Camera", ["id", "model", "width", "height", "params"]
)
BaseImage = collections.namedtuple(
    "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]
)
Point3D = collections.namedtuple(
    "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]
)


class Image(BaseImage):

  def qvec2rotmat(self):
    return qvec2rotmat(self.qvec)


CAMERA_MODELS = {
    CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
    CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
    CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
    CameraModel(model_id=3, model_name="RADIAL", num_params=5),
    CameraModel(model_id=4, model_name="OPENCV", num_params=8),
    CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
    CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
    CameraModel(model_id=7, model_name="FOV", num_params=5),
    CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
    CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
    CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12),
}
CAMERA_MODEL_IDS = dict(
    [(camera_model.model_id, camera_model) for camera_model in CAMERA_MODELS]
)


def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
  """Read and unpack the next bytes from a binary file.

  Args:
    fid: File object to read from.
    num_bytes: Number of bytes to read.
    format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
    endian_character: Any of {@, =, <, >, !}.

  Returns:
    Tuple of read and unpacked values.
  """
  data = fid.read(num_bytes)
  return struct.unpack(endian_character + format_char_sequence, data)


def read_cameras_text(path):
  """Read cameras from a text model file."""
  cameras = {}
  with open(path, "r") as fid:
    while True:
      line = fid.readline()
      if not line:
        break
      line = line.strip()
      if len(line) > 0 and line[0] != "#":
        elems = line.split()
        camera_id = int(elems[0])
        model = elems[1]
        width = int(elems[2])
        height = int(elems[3])
        params = np.array(tuple(map(float, elems[4:])))
        cameras[camera_id] = Camera(
            id=camera_id, model=model, width=width, height=height, params=params
        )
  return cameras


def read_cameras_binary(path_to_model_file):
  """Read cameras from a binary model file."""
  cameras = {}
  with open(path_to_model_file, "rb") as fid:
    num_cameras = read_next_bytes(fid, 8, "Q")[0]
    for _ in range(num_cameras):
      camera_properties = read_next_bytes(
          fid, num_bytes=24, format_char_sequence="iiQQ"
      )
      camera_id = camera_properties[0]
      model_id = camera_properties[1]
      model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
      width = camera_properties[2]
      height = camera_properties[3]
      num_params = CAMERA_MODEL_IDS[model_id].num_params
      params = read_next_bytes(
          fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params
      )
      cameras[camera_id] = Camera(
          id=camera_id,
          model=model_name,
          width=width,
          height=height,
          params=np.array(params),
      )
    assert len(cameras) == num_cameras
  return cameras


def read_images_text(path):
  """Read images from a text model file."""
  images = {}
  with open(path, "r") as fid:
    while True:
      line = fid.readline()
      if not line:
        break
      line = line.strip()
      if len(line) > 0 and line[0] != "#":
        elems = line.split()
        image_id = int(elems[0])
        qvec = np.array(tuple(map(float, elems[1:5])))
        tvec = np.array(tuple(map(float, elems[5:8])))
        camera_id = int(elems[8])
        image_name = elems[9]
        elems = fid.readline().split()
        xys = np.column_stack(
            [tuple(map(float, elems[0::3])), tuple(map(float, elems[1::3]))]
        )
        point3D_ids = np.array(tuple(map(int, elems[2::3])))
        images[image_id] = Image(
            id=image_id,
            qvec=qvec,
            tvec=tvec,
            camera_id=camera_id,
            name=image_name,
            xys=xys,
            point3D_ids=point3D_ids,
        )
  return images


def read_images_binary(path_to_model_file):
  """Read images from a binary model file."""
  images = {}
  with open(path_to_model_file, "rb") as fid:
    num_reg_images = read_next_bytes(fid, 8, "Q")[0]
    for _ in range(num_reg_images):
      binary_image_properties = read_next_bytes(
          fid, num_bytes=64, format_char_sequence="idddddddi"
      )
      image_id = binary_image_properties[0]
      qvec = np.array(binary_image_properties[1:5])
      tvec = np.array(binary_image_properties[5:8])
      camera_id = binary_image_properties[8]
      image_name = ""
      current_char = read_next_bytes(fid, 1, "c")[0]
      while current_char != b"\x00":  # look for the ASCII 0 entry
        image_name += current_char.decode("utf-8")
        current_char = read_next_bytes(fid, 1, "c")[0]
      num_points2D = read_next_bytes(
          fid, num_bytes=8, format_char_sequence="Q"
      )[0]
      x_y_id_s = read_next_bytes(
          fid,
          num_bytes=24 * num_points2D,
          format_char_sequence="ddq" * num_points2D,
      )
      xys = np.column_stack(
          [tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))]
      )
      point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
      images[image_id] = Image(
          id=image_id,
          qvec=qvec,
          tvec=tvec,
          camera_id=camera_id,
          name=image_name,
          xys=xys,
          point3D_ids=point3D_ids,
      )
  return images


def read_points3D_text(path):
  """Read points3D from a text model file."""
  points3D = {}
  with open(path, "r") as fid:
    while True:
      line = fid.readline()
      if not line:
        break
      line = line.strip()
      if len(line) > 0 and line[0] != "#":
        elems = line.split()
        point3D_id = int(elems[0])
        xyz = np.array(tuple(map(float, elems[1:4])))
        rgb = np.array(tuple(map(int, elems[4:7])))
        error = float(elems[7])
        image_ids = np.array(tuple(map(int, elems[8::2])))
        point2D_idxs = np.array(tuple(map(int, elems[9::2])))
        points3D[point3D_id] = Point3D(
            id=point3D_id,
            xyz=xyz,
            rgb=rgb,
            error=error,
            image_ids=image_ids,
            point2D_idxs=point2D_idxs,
        )
  return points3D


def read_points3d_binary(path_to_model_file):
  """Read points3D from a binary model file."""
  points3D = {}
  with open(path_to_model_file, "rb") as fid:
    num_points = read_next_bytes(fid, 8, "Q")[0]
    for _ in range(num_points):
      binary_point_line_properties = read_next_bytes(
          fid, num_bytes=43, format_char_sequence="QdddBBBd"
      )
      point3D_id = binary_point_line_properties[0]
      xyz = np.array(binary_point_line_properties[1:4])
      rgb = np.array(binary_point_line_properties[4:7])
      error = np.array(binary_point_line_properties[7])
      track_length = read_next_bytes(
          fid, num_bytes=8, format_char_sequence="Q"
      )[0]
      track_elems = read_next_bytes(
          fid,
          num_bytes=8 * track_length,
          format_char_sequence="ii" * track_length,
      )
      image_ids = np.array(tuple(map(int, track_elems[0::2])))
      point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
      points3D[point3D_id] = Point3D(
          id=point3D_id,
          xyz=xyz,
          rgb=rgb,
          error=error,
          image_ids=image_ids,
          point2D_idxs=point2D_idxs,
      )
  return points3D


def read_model(path, ext):
  if ext == ".txt":
    cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
    images = read_images_text(os.path.join(path, "images" + ext))
    points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
  else:
    cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
    images = read_images_binary(os.path.join(path, "images" + ext))
    points3D = read_points3d_binary(os.path.join(path, "points3D") + ext)
  return cameras, images, points3D


def qvec2rotmat(qvec):
  return np.array([
      [
          1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
          2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
          2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2],
      ],
      [
          2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
          1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
          2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1],
      ],
      [
          2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
          2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
          1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2,
      ],
  ])


def rotmat2qvec(R):
  """Rotation matrix to quaternion."""
  Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
  K = (
      np.array([
          [Rxx - Ryy - Rzz, 0, 0, 0],
          [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
          [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
          [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz],
      ])
      / 3.0
  )
  eigvals, eigvecs = np.linalg.eigh(K)
  qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
  if qvec[0] < 0:
    qvec *= -1
  return qvec


def main():
  if len(sys.argv) != 3:
    print("Usage: python read_model.py path/to/model/folder [.txt,.bin]")
    return

  cameras, images, points3D = read_model(path=sys.argv[1], ext=sys.argv[2])

  print("num_cameras:", len(cameras))
  print("num_images:", len(images))
  print("num_points3D:", len(points3D))


if __name__ == "__main__":
  main()
